package cn.ipanel.bigdata.example

import org.apache.spark.{SparkConf, SparkContext}

/**
 *
 * Spark集群简单读写hdfs上的文件
 *
 */
object WordCount {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setMaster("local[2]").setAppName("WordCount")

    val sc = new SparkContext(conf)
    //加载hdfs文件到SparkRDD
    val lines = sc.textFile("/data/flume/kafka/20231205/FlumeData.1701788528421")
    //WordCount实现
    val result = lines.flatMap(x=>x.split(",")).map((_,1)).reduceByKey(_+_)
    //结果保存在hdfs
    result.saveAsTextFile("/data/flume/kafka/result.log")
    sc.stop()
  }
}

